Set up

Packages

require(tidyr)
require(dplyr)
require(magrittr)
require(ggplot2)
require(bayesplot)
require(ape)
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
require(brms)
require(ggmcmc)
require(knitr)
require(phytools)
require(cowplot)
require(RColorBrewer)
require(ggtree)
require(car)
require(xtable)

# NRMSE function
nrmse <-  function(obs, pred, type = "sd") {
  
    squared_sums <- sum((obs - pred)^2)
    mse <- squared_sums/length(obs)
    rmse <- sqrt(mse)
    if (type == "sd") nrmse <- rmse/sd(obs)
    if (type == "mean") nrmse <- rmse/mean(obs)
    if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
    if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
    if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
    nrmse <- round(nrmse, 3)
 return(nrmse)
    
}

Loading data

data <- read.csv("./sp.rare.inv.csv", as.is=F)

# one family has more rare species than species
data[data$rare>data$species,]

# increasing species by 1 to match
data$species[data$family=="Metteniusaceae"] <- 8

nrow(data)# 407
range(data$vetted)
nrow(data[data$vetted>0,])# 260

# creating column for woodiness
data$woodiness <- data$herbaceous + data$woody + data$variable

# fixing typo in names 
names(data)[names(data)%in%"dmonoecious"] <- "monoecious"

### Zanne tree & generating tree representing families
tree <- read.tree("./Vascular_Plants_rooted.dated.tre")
spec_fam <- read.csv("./spec.fam.csv", as.is=T)
tree <- drop.tip(tree, setdiff(tree$tip.label,as.character(spec_fam$sp)))
lookup <- setNames(spec_fam$family, spec_fam$sp)
tree$tip.label <- as.character(lookup[tree$tip.label])

# Getting new family age
min_above_zero <- function(x){

  if (length(x)>1) {
    min(x[x>0])
  } else {0}

}

# Calculate family age (may be influenced by families missing from the tree)
coph <- cophenetic(tree)

fam_age <- sapply(rownames(coph), function(fam) 
          min_above_zero(coph[rownames(coph)%in%fam,])
          ) 

new_ages <- data.frame(family=names(fam_age), new_age=as.numeric(fam_age)/2)
new_ages %<>% arrange(family)
data <- left_join(data,new_ages)

# There are some ages of NA (not in tree?)
# Remove these
data <- data[!is.na(data$new_age),]

# Calculating simple diversification metric - log(N+1)/age
data$diversification <- with(data, log(species)/new_age)

# creating another family level variable (for non-phylogenetic family level effects)
data$family_name <- data$family

# These families we have data for, but are not in tree
setdiff(data$family, tree$tip.label) 
# none

# Identify families in tree with no data
setdiff(tree$tip.label, data$family)
# Centrolepidaceae

# remove missing families in data
data <- data[data$family%in%tree$tip.label,]

# remove families in tree with no data 
tree <- drop.tip(tree, setdiff(tree$tip.label, as.character(data$family)))

# phylogenetic correlation structure
phy_cov <- vcv(tree, corr=TRUE)

Transforming predictors

# Code for scale and unscale with mean=0 sd=0.5 (following Gelman 2008)
scale_half <- function(x){
  return( (x-mean(x)) * 0.5/sd(x))
}

unscale_half <- function(x,original){
    x*sd(original)*2 + mean(original)  
}

# Converting species.hybrids to proportion
data$species.hybrids <- data$species.hybrids/data$species

# Scaling continuous predictors
data$new_age_scaled <- scale_half(log(data$new_age))
data$species_scaled <- scale_half(log(data$species))
data$diversification_scaled <- scale_half(log(data$diversification+1))
data$Naturalised_scaled <- scale_half(log(data$Naturalised+1))
data$species.hybrids <- scale_half(data$species.hybrids)

# Subsetting to IUCN vetted species (for rarity models)
data_vetted <- data[data$vetted>0,]
tree_vetted <- drop.tip(tree, setdiff(tree$tip.label, as.character(data_vetted$family)))
phy_cov_vetted <- vcv(tree_vetted, corr=TRUE)

Assessing multicollinearity

vif(lm(rare ~ Naturalised_scaled + diversification_scaled + new_age_scaled, data=data_vetted))
##     Naturalised_scaled diversification_scaled         new_age_scaled 
##               2.183677               4.036088               2.685096
vif(lm(rare ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal, data=data_vetted))
## diversification_scaled         new_age_scaled              woodiness 
##               3.695686               2.572373               1.987190 
##             herbaceous            climate.sum        species.hybrids 
##               1.682411               1.669569               1.125479 
##         hermaphroditic              dioecious             monoecious 
##               1.592911               1.286303               1.354939 
##                asexual           fleshy.fruit                 animal 
##               1.227108               1.128379               1.397909
vif(lm(rare ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal, data=data))
## diversification_scaled         new_age_scaled              woodiness 
##               2.922019               1.719045               1.921781 
##             herbaceous            climate.sum        species.hybrids 
##               1.533170               1.714742               1.115131 
##         hermaphroditic              dioecious             monoecious 
##               1.666352               1.484602               1.309214 
##                asexual           fleshy.fruit                 animal 
##               1.269580               1.066365               1.246033

   

Rarity models (with IUCN evaluated species)

Simple rarity model

if (!file.exists("./vet_rare_naturalized_10k.rds")) {

  vet_rare_nat <- brm(rare | trials(vetted) ~ Naturalised_scaled + diversification_scaled + new_age_scaled +  (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=12), cores=4)

  saveRDS(vet_rare_nat, "./vet_rare_naturalized_10k.rds")

} else { vet_rare_nat <- readRDS("./vet_rare_naturalized_10k.rds")}

outcome <- summary(vet_rare_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Number of naturalized species","Diversification rate","Family age","SD brownian effects","SD family specific effects")

vet_rare_nat_plot <- stanplot(vet_rare_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nat_plot

# pdf("./plots_tables/vet_rare_nat_plot.pdf", width=5, height=4)
# vet_rare_nat_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Number of naturalized species -0.97 0.00 0.21 -1.38 -1.11 -0.97 -0.84 -0.55 4154.70 1.00
Diversification rate 1.66 0.01 0.35 0.98 1.42 1.66 1.90 2.35 4452.05 1.00
Family age 1.32 0.01 0.33 0.67 1.10 1.32 1.54 1.98 3989.45 1.00
SD brownian effects 0.98 0.01 0.20 0.58 0.84 0.98 1.11 1.38 892.78 1.00
SD family specific effects 0.60 0.01 0.14 0.26 0.52 0.61 0.70 0.84 711.91 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_rare_nat.tex")

##Posterior predictive checks
pp_vet_rare_nat <- brms::posterior_predict(vet_rare_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nat$data$rare, pp_vet_rare_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet_rare_nat <- apply(pp_vet_rare_nat, 1, function(x) nrmse(pred=x,obs=vet_rare_nat$data$rare, type="iq"))
mean(nrmse_vet_rare_nat)
## [1] 0.1646362
sd(nrmse_vet_rare_nat)
## [1] 0.01917734
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_rare_nat, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0      0.7      0.16     0.34     0.96         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Full rarity model (IUCN vetted species)

if (!file.exists("./vet_rare_10k.rds")) {

  vet_rare <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov_vetted),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare, "./vet_rare_10k.rds")

} else { vet_rare <- readRDS("./vet_rare_10k.rds")}

# prior_summary(vet_rare, all=FALSE)

outcome <- summary(vet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age","Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

vet_rare_plot <- stanplot(vet_rare, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_plot

# pdf("./plots_tables/vet_rare_plot.pdf", width=5, height=4)
# vet_rare_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.47 0.00 0.33 -0.16 0.25 0.47 0.69 1.12 4909.12 1.00
Famly age 0.42 0.00 0.30 -0.18 0.21 0.42 0.62 1.01 4801.86 1.00
Woodiness 0.61 0.00 0.13 0.34 0.53 0.62 0.70 0.87 4192.85 1.00
Herbaceous -1.05 0.00 0.23 -1.48 -1.21 -1.06 -0.90 -0.58 3416.65 1.00
Climate sum -0.26 0.00 0.09 -0.45 -0.32 -0.26 -0.20 -0.08 4883.19 1.00
Number of species hybrids -0.18 0.00 0.13 -0.43 -0.26 -0.18 -0.09 0.08 5035.53 1.00
Hermaphroditic 0.13 0.00 0.23 -0.31 -0.02 0.14 0.29 0.58 5219.81 1.00
Dioecious -0.18 0.00 0.15 -0.48 -0.28 -0.18 -0.08 0.13 4815.15 1.00
Monoecious -0.14 0.00 0.16 -0.45 -0.25 -0.14 -0.04 0.16 4594.80 1.00
Asexual 0.04 0.00 0.16 -0.29 -0.07 0.04 0.15 0.36 4727.05 1.00
Fleshy fruit 0.03 0.00 0.15 -0.26 -0.08 0.03 0.14 0.33 4478.59 1.00
Animal pollinated 0.54 0.00 0.26 0.04 0.37 0.54 0.72 1.05 4724.09 1.00
SD brownian effects 0.52 0.01 0.27 0.03 0.31 0.51 0.71 1.06 578.59 1.01
SD family specific effects 0.74 0.00 0.12 0.48 0.67 0.75 0.82 0.92 754.15 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_rare.tex")

##Posterior predictive checks
# first make posterior predictions
pp_rare <- brms::posterior_predict(vet_rare)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_vet <- apply(pp_rare, 1, function(x) nrmse(pred=x,obs=vet_rare$data$rare, type="iq"))
mean(nrmse_vet)
## [1] 0.1601284
sd(nrmse_vet)
## [1] 0.01900533
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(vet_rare, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.34      0.24        0     0.82         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# plot(hyp)

Naturalized model

if (!file.exists("./fit_nat_10k.rds")) {

  fit_nat <- brm(Naturalised | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.8, max_treedepth=11))

  saveRDS(fit_nat, "./fit_nat_10k.rds")

} else { fit_nat <- readRDS("./fit_nat_10k.rds")}

outcome <- summary(fit_nat$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

fit_nat_plot <- stanplot(fit_nat, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of naturalized species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_nat_plot

# pdf("./plots_tables/fit_nat_plot.pdf", width=5, height=4)
# fit_nat_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -1.56 0.00 0.29 -2.13 -1.76 -1.56 -1.36 -1.00 4629.30 1.00
Famly age -1.10 0.00 0.31 -1.71 -1.31 -1.10 -0.90 -0.49 4655.53 1.00
Woodiness 0.04 0.00 0.14 -0.23 -0.06 0.04 0.14 0.31 4002.18 1.00
Herbaceous 0.75 0.00 0.23 0.30 0.59 0.75 0.90 1.21 4074.05 1.00
Climate sum 0.30 0.00 0.10 0.11 0.24 0.31 0.37 0.50 4688.40 1.00
Number of species hybrids 0.40 0.00 0.12 0.16 0.32 0.40 0.48 0.64 3897.79 1.00
Hermaphroditic 0.09 0.00 0.24 -0.40 -0.07 0.09 0.25 0.56 4303.71 1.00
Dioecious -0.02 0.00 0.17 -0.35 -0.14 -0.02 0.09 0.32 3481.80 1.00
Monoecious -0.01 0.00 0.17 -0.35 -0.13 -0.01 0.10 0.32 4596.19 1.00
Asexual 0.49 0.00 0.18 0.14 0.37 0.49 0.61 0.85 4428.52 1.00
Fleshy fruit -0.09 0.00 0.14 -0.37 -0.19 -0.09 0.01 0.19 4782.99 1.00
Animal pollinated -0.50 0.00 0.29 -1.06 -0.69 -0.50 -0.31 0.07 4695.63 1.00
SD brownian effects 1.53 0.01 0.18 1.14 1.42 1.55 1.66 1.84 730.89 1.01
SD family specific effects 0.36 0.01 0.21 0.02 0.19 0.36 0.51 0.76 528.37 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/fit_nat.tex")

##Posterior predictive checks
pp_nat <- brms::posterior_predict(fit_nat)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_nat$data$Naturalised, pp_nat[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nat <- apply(pp_nat, 1, function(x) nrmse(pred=x,obs=fit_nat$data$Naturalised, type="iq"))
mean(nrmse_nat)
## [1] 0.5035252
sd(nrmse_nat)
## [1] 0.07130383
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_nat, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.92      0.08      0.7        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# simplified threat model
outcome <- summary(vet_rare_nat$fit, prob=c(0.1,0.9))
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]
to_print <- data.frame(to_print)

names1 <- c("Number of naturalized species","Diversification rate","Family age","SD Brownian effects","SD family specific effects")
to_print$variable <- as.factor(names1)


p <-  ggplot(to_print, aes(x=variable, y=mean)) + scale_x_discrete(limits = rev(    levels(to_print$variable))) +
      geom_pointrange(aes(ymin=X10., ymax=X90.)) + coord_flip()  + 
      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
      geom_hline(yintercept = 0) + xlab("") + ylab("") +
      theme(legend.title = element_blank()) 
p

# ggsave("./plots_tables/threat_nat_forest.pdf", p , height=3,width=6)
# ggsave("./plots_tables/threat_nat_forest.png", p , height=3,width=6)


# combined Threat and Naturalized
threat_out <- summary(vet_rare$fit, prob=c(0.1,0.9))
threat_out <- threat_out$summary[grep("^[sd,b]_*",rownames(threat_out$summary)),]
threat_out <- threat_out[-1,]
threat_out <- data.frame(threat_out)
threat_out$group <- rev(letters[1:nrow(threat_out)])
threat_out$model <- "Threatened"

nat_out <- summary(fit_nat$fit, prob=c(0.1,0.9))
nat_out <- nat_out$summary[grep("^[sd,b]_*",rownames(nat_out$summary)),]
nat_out <- nat_out[-1,]
nat_out <- data.frame(nat_out)
nat_out$group <- rev(letters[1:nrow(nat_out)])
nat_out$model <- "Naturalized"

labels=c("Diversification rate","Family age","Variation in growth form","Herbaceous","Climate range variability","Species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual seed production","Fleshy fruits","Animal pollination","SD Brownian effects","SD family specific effects")

threat_out$variable <- labels
nat_out$variable <- labels

coefs <- rbind(threat_out, nat_out)

coefs$overlap <- c( "n","n","n","n","n","n","y","y","y","y","y","n","n","n",
                    "n","n","y","n","n","n","y","y","y","n","y","n","n","n")

coefs$opposite <- c("y","y","n","y","y","y","n","n","n","n","n","y","n","n",
                    "y","y","n","y","y","y","n","n","n","n","n","y","n","n")

coefs$both <- c("ny","ny","nn","ny","ny","ny","yn","yn","yn","yn","yn","ny","nn","nn",
                "ny","ny","yn","ny","ny","ny","yn","yn","yn","nn","yn","ny","nn","nn")

labels<-rev(labels)

cols<-c("#88b7c5","#a36666")

p2 <- ggplot(coefs, aes(x=group, y=mean, color=model)) + scale_shape_manual(values=c(17,15,17)) + scale_alpha_manual(values=c(1,1,0.4)) +
  geom_pointrange(aes(ymin=X10., ymax=X90.,shape=both,alpha=both)) + coord_flip()  + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black")) + 
  geom_hline(yintercept = 0)  + xlab("") + ylab("") + theme(legend.position = "none") +
theme(legend.title = element_blank()) + scale_x_discrete(labels=labels) + guides(shape = FALSE, size = FALSE) + scale_colour_manual(values=cols)

p2             

# ggsave("./plots_tables/combined_forest.pdf", p2 , height=5,width=6)
# ggsave("./plots_tables/combined_forest.png", p2 , height=5,width=6)

Sensitivity analyses

Full rarity model: no family level effects

if (!file.exists("./vet_rare_nofam_10k.rds")) {

  vet_rare_nofam <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal, 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_nofam, "./vet_rare_nofam_10k.rds")

} else { vet_rare_nofam <- readRDS("./vet_rare_nofam_10k.rds")}

outcome <- summary(vet_rare_nofam$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated")

vet_rare_nofam_plot <- stanplot(vet_rare_nofam, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nofam_plot

# pdf("./plots_tables/vet_rare_nofam_plot.pdf", width=5, height=4)
# vet_rare_nofam_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.18 0 0.09 0.01 0.12 0.18 0.24 0.35 4659.32 1
Famly age 0.21 0 0.08 0.05 0.15 0.21 0.27 0.38 4690.68 1
Woodiness 0.50 0 0.03 0.44 0.48 0.50 0.52 0.56 4826.35 1
Herbaceous -0.71 0 0.05 -0.81 -0.74 -0.71 -0.67 -0.60 4832.17 1
Climate sum -0.37 0 0.02 -0.42 -0.38 -0.37 -0.35 -0.32 4897.05 1
Number of species hybrids -0.15 0 0.04 -0.24 -0.18 -0.15 -0.12 -0.07 5069.49 1
Hermaphroditic 0.19 0 0.06 0.07 0.15 0.19 0.23 0.31 5160.68 1
Dioecious 0.12 0 0.03 0.05 0.10 0.12 0.14 0.19 4611.11 1
Monoecious -0.26 0 0.04 -0.33 -0.28 -0.26 -0.23 -0.19 4617.61 1
Asexual 0.18 0 0.03 0.11 0.15 0.18 0.20 0.25 5147.77 1
Fleshy fruit -0.24 0 0.02 -0.28 -0.25 -0.24 -0.23 -0.20 4797.91 1
Animal pollinated 0.92 0 0.06 0.80 0.88 0.92 0.96 1.04 4518.48 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_rare_nofam.tex")


##Posterior predictive checks
# first make posterior predictions
pp_rare_nofam <- brms::posterior_predict(vet_rare_nofam)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nofam$data$rare, pp_rare_nofam[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_rare_nofam <- apply(pp_rare_nofam, 1, function(x) nrmse(pred=x,obs=vet_rare_nofam$data$rare, type="iq"))
mean(nrmse_rare_nofam)
## [1] 0.6575454
sd(nrmse_rare_nofam)
## [1] 0.03522097

Full rarity model: only non-brownian family effects

if (!file.exists("./vet_rare_nophylo_10k.rds")) {

  vet_rare_nophylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family_name), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4,
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_nophylo, "./vet_rare_nophylo_10k.rds")

} else { vet_rare_nophylo <- readRDS("./vet_rare_nophylo_10k.rds")}

outcome <- summary(vet_rare_nophylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD family specific effects")

vet_rare_nophylo_plot <- stanplot(vet_rare_nophylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_nophylo_plot

# pdf("./plots_tables/vet_rare_nophylo_plot.pdf", width=5, height=4)
# vet_rare_nophylo_plot + ggtitle("")
# dev.off()


rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.48 0.01 0.32 -0.15 0.26 0.48 0.69 1.12 3449.85 1
Famly age 0.49 0.00 0.28 -0.06 0.30 0.49 0.68 1.05 4074.19 1
Woodiness 0.65 0.00 0.12 0.41 0.57 0.65 0.73 0.89 3615.24 1
Herbaceous -1.17 0.00 0.19 -1.54 -1.30 -1.17 -1.04 -0.80 3453.06 1
Climate sum -0.27 0.00 0.09 -0.45 -0.33 -0.27 -0.21 -0.09 3180.37 1
Number of species hybrids -0.18 0.00 0.13 -0.44 -0.26 -0.17 -0.09 0.09 4328.42 1
Hermaphroditic 0.13 0.00 0.23 -0.31 -0.03 0.13 0.29 0.57 3397.32 1
Dioecious -0.18 0.00 0.15 -0.49 -0.29 -0.18 -0.08 0.11 3369.20 1
Monoecious -0.15 0.00 0.16 -0.46 -0.26 -0.15 -0.04 0.16 3130.92 1
Asexual 0.06 0.00 0.16 -0.27 -0.05 0.06 0.16 0.38 3148.09 1
Fleshy fruit 0.04 0.00 0.15 -0.27 -0.06 0.04 0.14 0.34 4320.36 1
Animal pollinated 0.62 0.00 0.24 0.16 0.46 0.62 0.79 1.09 3738.85 1
SD family specific effects 0.84 0.00 0.06 0.73 0.80 0.84 0.88 0.97 3665.59 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_rare_nophylo.tex")


##Posterior predictive checks
pp_rare_nophylo <- brms::posterior_predict(vet_rare_nophylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_nophylo$data$rare, pp_rare_nophylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_rare_nophylo <- apply(pp_rare_nophylo, 1, function(x) nrmse(pred=x,obs=vet_rare_nophylo$data$rare, type="iq"))
mean(nrmse_rare_nophylo)
## [1] 0.1599722
sd(nrmse_rare_nophylo)
## [1] 0.01881821

Full rarity model: only brownian family effects

if (!file.exists("./vet_rare_phylo_10k.rds")) {

  vet_rare_phylo <- brm(rare | trials(vetted) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal+ (1|family), 
    dat=data_vetted, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(vet_rare_phylo, "./vet_rare_phylo_10k.rds")

} else { vet_rare_phylo <- readRDS("./vet_rare_phylo_10k.rds")}

outcome <- summary(vet_rare_phylo$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects")

vet_rare_phylo_plot <- stanplot(vet_rare_phylo, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
vet_rare_phylo_plot

# pdf("./plots_tables/vet_rare_phylo_plot.pdf", width=5, height=4)
# vet_rare_phylo_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.56 0.01 0.33 -0.08 0.34 0.57 0.79 1.21 4148.29 1
Famly age 0.36 0.01 0.33 -0.30 0.13 0.36 0.60 1.00 4148.20 1
Woodiness 0.57 0.00 0.14 0.31 0.48 0.57 0.66 0.85 3461.78 1
Herbaceous -0.90 0.00 0.23 -1.34 -1.05 -0.90 -0.75 -0.45 3554.01 1
Climate sum -0.24 0.00 0.09 -0.42 -0.30 -0.24 -0.18 -0.06 3688.69 1
Number of species hybrids -0.18 0.00 0.12 -0.43 -0.27 -0.18 -0.10 0.05 4822.50 1
Hermaphroditic 0.10 0.00 0.23 -0.35 -0.05 0.10 0.25 0.55 3658.94 1
Dioecious -0.19 0.00 0.15 -0.48 -0.29 -0.19 -0.10 0.10 3729.29 1
Monoecious -0.10 0.00 0.15 -0.39 -0.20 -0.10 0.01 0.20 3670.84 1
Asexual 0.00 0.00 0.16 -0.31 -0.10 0.00 0.11 0.32 3637.14 1
Fleshy fruit -0.09 0.00 0.12 -0.33 -0.17 -0.09 0.00 0.16 4398.84 1
Animal pollinated 0.44 0.00 0.26 -0.06 0.27 0.44 0.62 0.96 4281.31 1
SD brownian effects 1.31 0.00 0.10 1.13 1.24 1.31 1.38 1.52 3387.73 1
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/vet_rare_phylo.tex")


##Posterior predictive checks
pp_rare_phylo <- brms::posterior_predict(vet_rare_phylo)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=vet_rare_phylo$data$rare, pp_rare_phylo[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_rare_phylo <- apply(pp_rare_phylo, 1, function(x) nrmse(pred=x,obs=vet_rare_phylo$data$rare, type="iq"))
mean(nrmse_rare_phylo)
## [1] 0.1602382
sd(nrmse_rare_phylo)
## [1] 0.01897218

Full rarity model (total species richness - not vetted by IUCN)

if (!file.exists("./nonvet_rare_10k.rds")) {

  nonvet_rare <- brm(rare | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.95,max_treedepth=13))

  saveRDS(nonvet_rare, "./nonvet_rare_10k.rds")

} else { nonvet_rare <- readRDS("./nonvet_rare_10k.rds")}

outcome <- summary(nonvet_rare$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

nonvet_rare_plot <- stanplot(nonvet_rare, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print))) + ggtitle("Proportion rare species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
nonvet_rare_plot

# pdf("./plots_tables/nonvet_rare_plot.pdf", width=5, height=4)
# nonvet_rare_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate 0.69 0.01 0.36 0.00 0.45 0.68 0.94 1.40 3861.43 1.00
Famly age 0.51 0.00 0.32 -0.09 0.30 0.51 0.72 1.14 4546.58 1.00
Woodiness -0.41 0.00 0.16 -0.72 -0.52 -0.41 -0.30 -0.09 3761.04 1.00
Herbaceous -0.62 0.00 0.26 -1.12 -0.79 -0.63 -0.45 -0.12 3278.30 1.00
Climate sum -0.19 0.00 0.11 -0.41 -0.26 -0.18 -0.11 0.03 4986.33 1.00
Number of species hybrids 0.02 0.00 0.16 -0.28 -0.08 0.02 0.13 0.34 3968.77 1.00
Hermaphroditic -0.12 0.00 0.26 -0.64 -0.29 -0.12 0.05 0.41 4766.16 1.00
Dioecious -0.19 0.00 0.19 -0.57 -0.32 -0.19 -0.06 0.19 4467.70 1.00
Monoecious 0.08 0.00 0.18 -0.28 -0.05 0.08 0.20 0.43 4738.70 1.00
Asexual 0.15 0.00 0.20 -0.23 0.02 0.16 0.29 0.55 4049.91 1.00
Fleshy fruit 0.24 0.00 0.18 -0.12 0.12 0.23 0.35 0.60 4917.81 1.00
Animal pollinated 0.17 0.00 0.31 -0.44 -0.04 0.17 0.37 0.77 4146.13 1.00
SD brownian effects 0.68 0.02 0.33 0.07 0.45 0.69 0.91 1.31 466.04 1.01
SD family specific effects 1.07 0.01 0.13 0.80 0.99 1.08 1.16 1.29 650.85 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/nonvet_rare.tex")

##Posterior predictive checks
pp_rare <- brms::posterior_predict(nonvet_rare)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=nonvet_rare$data$rare, pp_rare[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_nonvet <- apply(pp_rare, 1, function(x) nrmse(pred=x,obs=nonvet_rare$data$rare, type="iq"))
mean(nrmse_nonvet)
## [1] 0.419025
sd(nrmse_nonvet)
## [1] 0.04638537
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(nonvet_rare, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0      0.3       0.2        0     0.72         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Invasive model

if (!file.exists("./fit_inv_10k.rds")) {

  fit_inv <- brm(Invasive | trials(species) ~ diversification_scaled + new_age_scaled + woodiness + herbaceous + climate.sum + species.hybrids + hermaphroditic + dioecious + monoecious + asexual + fleshy.fruit + animal + (1|family) + (1|family_name), 
    dat=data, family=binomial(), 
    iter=10000, thin=4, cov_ranef = list(family = phy_cov),
    control=list(adapt_delta=0.8, max_treedepth=11))

  saveRDS(fit_inv, "./fit_inv_10k.rds")

} else { fit_inv <- readRDS("./fit_inv_10k.rds")}

outcome <- summary(fit_inv$fit)
to_print <- outcome$summary[grep("^[sd,b]_*",rownames(outcome$summary)),]
to_print <- to_print[-1,]

names1 <- c("Diversification rate","Famly age", "Woodiness","Herbaceous","Climate sum","Number of species hybrids","Hermaphroditic","Dioecious","Monoecious","Asexual","Fleshy fruit","Animal pollinated","SD brownian effects","SD family specific effects")

fit_inv_plot <- stanplot(fit_inv, pars=rownames(to_print), type="intervals", 
                    prob=0.8, prob_outer=0.95, point_est="mean") + scale_y_discrete(labels=rev(names1), limits=rev(rownames(to_print)))+ ggtitle("Proportion of invasive species")
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
fit_inv_plot

# pdf("./plots_tables/fit_inv_plot.pdf", width=5, height=4)
# fit_inv_plot + ggtitle("")
# dev.off()

rownames(to_print) <- names1
kable(to_print, digits=2)
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
Diversification rate -1.05 0.01 0.42 -1.84 -1.34 -1.06 -0.78 -0.19 4923.67 1.00
Famly age -1.36 0.01 0.43 -2.17 -1.66 -1.37 -1.08 -0.51 4061.55 1.00
Woodiness 0.26 0.00 0.18 -0.09 0.14 0.26 0.38 0.62 5111.45 1.00
Herbaceous 0.18 0.00 0.32 -0.46 -0.03 0.19 0.40 0.80 4250.85 1.00
Climate sum 0.09 0.00 0.13 -0.17 0.00 0.09 0.18 0.36 4097.46 1.00
Number of species hybrids 0.48 0.00 0.16 0.16 0.38 0.49 0.60 0.80 4830.34 1.00
Hermaphroditic -0.15 0.00 0.32 -0.78 -0.37 -0.15 0.07 0.49 4695.99 1.00
Dioecious 0.01 0.00 0.21 -0.40 -0.13 0.02 0.15 0.42 4177.05 1.00
Monoecious 0.00 0.00 0.21 -0.42 -0.13 0.00 0.14 0.41 4507.56 1.00
Asexual 0.43 0.00 0.22 -0.01 0.28 0.43 0.58 0.86 4825.69 1.00
Fleshy fruit 0.01 0.00 0.19 -0.35 -0.11 0.01 0.14 0.39 4463.89 1.00
Animal pollinated -0.15 0.01 0.36 -0.85 -0.40 -0.14 0.10 0.57 4435.95 1.00
SD brownian effects 1.17 0.02 0.44 0.14 0.90 1.24 1.50 1.81 663.32 1.01
SD family specific effects 0.63 0.01 0.30 0.05 0.41 0.67 0.87 1.11 686.56 1.01
to_print <- to_print[,c("mean","sd","2.5%","50%","97.5%","n_eff","Rhat")]
print(xtable(to_print, type = "latex"), file = "./plots_tables/invasive.tex")

##Posterior predictive checks
pp_inv <- brms::posterior_predict(fit_inv)

log1 <- scale_x_continuous(trans="log1p")

ppc_dens_overlay(y=fit_inv$data$Invasive, pp_inv[1:50, ]) + scale_x_continuous(trans="log1p")

# NRMSE
nrmse_inv <- apply(pp_inv, 1, function(x) nrmse(pred=x,obs=fit_inv$data$Invasive, type="iq"))
mean(nrmse_inv)
## [1] 1.005749
sd(nrmse_inv)
## [1] 0.1460237
# family-level variance due to brownian motion component
hyp <- paste("sd_family__Intercept^2 /", "(sd_family__Intercept^2 + sd_family_name__Intercept^2) = 0")
(hyp <- hypothesis(fit_inv, hyp, class = NULL))
## Hypothesis Tests for class :
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (sd_family__Inter... = 0     0.69      0.29     0.02        1         NA
##   Post.Prob Star
## 1        NA    *
## ---
## '*': The expected value under the hypothesis lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.